{
 "cells": [
  {
   "cell_type": "code",
   "id": "initial_id",
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2024-05-13T07:06:58.169128Z",
     "start_time": "2024-05-13T07:05:14.138211Z"
    }
   },
   "source": [
    "# 使用llama3模型，llama3本地模型使用[ollama](https://ollama.com)安装\n",
    "from langchain_community.llms import Ollama\n",
    "llm = Ollama(model=\"llama3\")\n",
    "\n",
    "# 加载提示模板\n",
    "from langchain_core.prompts import ChatPromptTemplate\n",
    "\n",
    "# 使用WebBaseLoader加载知识库\n",
    "from langchain_community.document_loaders import WebBaseLoader\n",
    "loader = WebBaseLoader(\"https://docs.smith.langchain.com/user_guide\")\n",
    "docs = loader.load()\n",
    "\n",
    "# 加载嵌入功能\n",
    "from langchain_community.embeddings import OllamaEmbeddings\n",
    "embeddings = OllamaEmbeddings()\n",
    "\n",
    "# 建立索引\n",
    "from langchain_community.vectorstores import FAISS\n",
    "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
    "text_splitter = RecursiveCharacterTextSplitter()\n",
    "documents = text_splitter.split_documents(docs)\n",
    "vector = FAISS.from_documents(documents, embeddings)\n",
    "\n",
    "# 加载检索器\n",
    "from langchain.chains import create_retrieval_chain\n",
    "retriever = vector.as_retriever()"
   ],
   "outputs": [],
   "execution_count": 5
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-13T07:06:58.199984Z",
     "start_time": "2024-05-13T07:06:58.178134Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 创建一个新链。该链将接受最近的输入 ( input) 和对话历史记录 ( chat_history) 并使用 LLM 生成搜索查询。\n",
    "from langchain.chains import create_history_aware_retriever\n",
    "from langchain_core.prompts import MessagesPlaceholder\n",
    "\n",
    "# First we need a prompt that we can pass into an LLM to generate this search query\n",
    "\n",
    "prompt = ChatPromptTemplate.from_messages([\n",
    "    MessagesPlaceholder(variable_name=\"chat_history\"),\n",
    "    (\"user\", \"{input}\"),\n",
    "    (\"user\", \"Given the above conversation, generate a search query to look up to get information relevant to the conversation\")\n",
    "])\n",
    "retriever_chain = create_history_aware_retriever(llm, retriever, prompt)"
   ],
   "id": "7716bd3cc63d2108",
   "outputs": [],
   "execution_count": 6
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-13T07:08:04.066953Z",
     "start_time": "2024-05-13T07:06:58.207983Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 通过传递用户提出后续问题的实例来测试\n",
    "from langchain_core.messages import HumanMessage, AIMessage\n",
    "\n",
    "chat_history = [HumanMessage(content=\"Can LangSmith help test my LLM applications?\"), AIMessage(content=\"Yes!\")]\n",
    "retriever_chain.invoke({\n",
    "    \"chat_history\": chat_history,\n",
    "    \"input\": \"Tell me how\"\n",
    "})"
   ],
   "id": "89ccce97202d7",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Document(page_content=\"Every playground run is logged in the system and can be used to create test cases or compare with other runs.Beta Testing\\u200bBeta testing allows developers to collect more data on how their LLM applications are performing in real-world scenarios. In this phase, it’s important to develop an understanding for the types of inputs the app is performing well or poorly on and how exactly it’s breaking down in those cases. Both feedback collection and run annotation are critical for this workflow. This will help in curation of test cases that can help track regressions/improvements and development of automatic evaluations.Capturing Feedback\\u200bWhen launching your application to an initial set of users, it’s important to gather human feedback on the responses it’s producing. This helps draw attention to the most interesting runs and highlight edge cases that are causing problematic responses. LangSmith allows you to attach feedback scores to logged traces (oftentimes, this is hooked up to a feedback button in your app), then filter on traces that have a specific feedback tag and score. A common workflow is to filter on traces that receive a poor user feedback score, then drill down into problematic points using the detailed trace view.Annotating Traces\\u200bLangSmith also supports sending runs to annotation queues, which allow annotators to closely inspect interesting traces and annotate them with respect to different criteria. Annotators can be PMs, engineers, or even subject matter experts. This allows users to catch regressions across important evaluation criteria.Adding Runs to a Dataset\\u200bAs your application progresses through the beta testing phase, it's essential to continue collecting data to refine and improve its performance. LangSmith enables you to add runs as examples to datasets (from both the project page and within an annotation queue), expanding your test coverage on real-world scenarios. This is a key benefit in having your logging system and your evaluation/testing system in the same platform.Production\\u200bClosely inspecting key data points, growing benchmarking datasets, annotating traces, and drilling down into important data in trace view are workflows you’ll also want to do once your app hits production.However, especially at the production stage, it’s crucial to get a high-level overview of application performance with respect to latency, cost, and feedback scores. This ensures that it's delivering desirable results at scale.Online evaluations and automations allow you to process and score production traces in near real-time.Additionally, threads provide a seamless way to group traces from a single conversation, making it easier to track the performance of your application across multiple turns.Monitoring and A/B Testing\\u200bLangSmith provides monitoring charts that allow you to track key metrics over time. You can expand to view metrics for a given period and drill down into a specific data point to get a trace table for that time period — this is especially handy for debugging production issues.LangSmith also allows for tag and metadata grouping, which allows users to mark different versions of their applications with different identifiers and view how they are performing side-by-side within each chart. This is helpful for A/B testing changes in prompt, model, or retrieval strategy.Automations\\u200bAutomations are a powerful feature in LangSmith that allow you to perform actions on traces in near real-time. This can be used to automatically score traces, send them to annotation queues, or send them to datasets.To define an automation, simply provide a filter condition, a sampling rate, and an action to perform. Automations are particularly helpful for processing traces at production scale.Threads\\u200bMany LLM applications are multi-turn, meaning that they involve a series of interactions between the user and the application. LangSmith provides a threads view that groups traces from a single conversation together, making it easier to\", metadata={'source': 'https://docs.smith.langchain.com/user_guide', 'title': 'LangSmith User Guide | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we’ll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they’re just starting their journey.', 'language': 'en'}),\n",
       " Document(page_content='Skip to main contentLangSmith API DocsSearchGo to AppQuick StartUser GuideTracingEvaluationProduction Monitoring & AutomationsPrompt HubProxyPricingSelf-HostingCookbookThis is outdated documentation for 🦜️🛠️ LangSmith, which is no longer actively maintained.For up-to-date documentation, see the latest version.User GuideOn this pageLangSmith User GuideLangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we’ll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they’re just starting their journey.Prototyping\\u200bPrototyping LLM applications often involves quick experimentation between prompts, model types, retrieval strategy and other parameters.\\nThe ability to rapidly understand how the model is performing — and debug where it is failing — is incredibly important for this phase.Debugging\\u200bWhen developing new LLM applications, we suggest having LangSmith tracing enabled by default.\\nOftentimes, it isn’t necessary to look at every single trace. However, when things go wrong (an unexpected end result, infinite agent loop, slower than expected execution, higher than expected token usage), it’s extremely helpful to debug by looking through the application traces. LangSmith gives clear visibility and debugging information at each step of an LLM sequence, making it much easier to identify and root-cause issues.\\nWe provide native rendering of chat messages, functions, and retrieve documents.Initial Test Set\\u200bWhile many developers still ship an initial version of their application based on “vibe checks”, we’ve seen an increasing number of engineering teams start to adopt a more test driven approach. LangSmith allows developers to create datasets, which are collections of inputs and reference outputs, and use these to run tests on their LLM applications.\\nThese test cases can be uploaded in bulk, created on the fly, or exported from application traces. LangSmith also makes it easy to run custom evaluations (both LLM and heuristic based) to score test results.Comparison View\\u200bWhen prototyping different versions of your applications and making changes, it’s important to see whether or not you’ve regressed with respect to your initial test cases.\\nOftentimes, changes in the prompt, retrieval strategy, or model choice can have huge implications in responses produced by your application.\\nIn order to get a sense for which variant is performing better, it’s useful to be able to view results for different configurations on the same datapoints side-by-side. We’ve invested heavily in a user-friendly comparison view for test runs to track and diagnose regressions in test scores across multiple revisions of your application.Playground\\u200bLangSmith provides a playground environment for rapid iteration and experimentation.\\nThis allows you to quickly test out different prompts and models. You can open the playground from any prompt or model run in your trace.', metadata={'source': 'https://docs.smith.langchain.com/user_guide', 'title': 'LangSmith User Guide | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we’ll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they’re just starting their journey.', 'language': 'en'}),\n",
       " Document(page_content='meaning that they involve a series of interactions between the user and the application. LangSmith provides a threads view that groups traces from a single conversation together, making it easier to track the performance of and annotate your application across multiple turns.Was this page helpful?PreviousQuick StartNextOverviewPrototypingBeta TestingProductionCommunityDiscordTwitterGitHubDocs CodeLangSmith SDKPythonJS/TSMoreHomepageBlogLangChain Python DocsLangChain JS/TS DocsCopyright © 2024 LangChain, Inc.', metadata={'source': 'https://docs.smith.langchain.com/user_guide', 'title': 'LangSmith User Guide | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we’ll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they’re just starting their journey.', 'language': 'en'}),\n",
       " Document(page_content='LangSmith User Guide | 🦜️🛠️ LangSmith', metadata={'source': 'https://docs.smith.langchain.com/user_guide', 'title': 'LangSmith User Guide | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we’ll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they’re just starting their journey.', 'language': 'en'})]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 7
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-13T07:19:15.964014Z",
     "start_time": "2024-05-13T07:08:04.070953Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 创建一个新的链来继续与这些检索到的文档进行对话\n",
    "from langchain.chains.combine_documents import create_stuff_documents_chain\n",
    "prompt = ChatPromptTemplate.from_messages([\n",
    "    (\"system\", \"Answer the user's questions based on the below context:\\n\\n{context}\"),\n",
    "    MessagesPlaceholder(variable_name=\"chat_history\"),\n",
    "    (\"user\", \"{input}\"),\n",
    "])\n",
    "document_chain = create_stuff_documents_chain(llm, prompt)\n",
    "retrieval_chain = create_retrieval_chain(retriever_chain, document_chain)\n",
    "\n",
    "# 测试\n",
    "chat_history = [HumanMessage(content=\"Can LangSmith help test my LLM applications?\"), AIMessage(content=\"Yes!\")]\n",
    "retrieval_chain.invoke({\n",
    "    \"chat_history\": chat_history,\n",
    "    \"input\": \"Tell me how\"\n",
    "})"
   ],
   "id": "2d1fd00d6297105",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'chat_history': [HumanMessage(content='Can LangSmith help test my LLM applications?'),\n",
       "  AIMessage(content='Yes!')],\n",
       " 'input': 'Tell me how',\n",
       " 'context': [Document(page_content=\"Every playground run is logged in the system and can be used to create test cases or compare with other runs.Beta Testing\\u200bBeta testing allows developers to collect more data on how their LLM applications are performing in real-world scenarios. In this phase, it’s important to develop an understanding for the types of inputs the app is performing well or poorly on and how exactly it’s breaking down in those cases. Both feedback collection and run annotation are critical for this workflow. This will help in curation of test cases that can help track regressions/improvements and development of automatic evaluations.Capturing Feedback\\u200bWhen launching your application to an initial set of users, it’s important to gather human feedback on the responses it’s producing. This helps draw attention to the most interesting runs and highlight edge cases that are causing problematic responses. LangSmith allows you to attach feedback scores to logged traces (oftentimes, this is hooked up to a feedback button in your app), then filter on traces that have a specific feedback tag and score. A common workflow is to filter on traces that receive a poor user feedback score, then drill down into problematic points using the detailed trace view.Annotating Traces\\u200bLangSmith also supports sending runs to annotation queues, which allow annotators to closely inspect interesting traces and annotate them with respect to different criteria. Annotators can be PMs, engineers, or even subject matter experts. This allows users to catch regressions across important evaluation criteria.Adding Runs to a Dataset\\u200bAs your application progresses through the beta testing phase, it's essential to continue collecting data to refine and improve its performance. LangSmith enables you to add runs as examples to datasets (from both the project page and within an annotation queue), expanding your test coverage on real-world scenarios. This is a key benefit in having your logging system and your evaluation/testing system in the same platform.Production\\u200bClosely inspecting key data points, growing benchmarking datasets, annotating traces, and drilling down into important data in trace view are workflows you’ll also want to do once your app hits production.However, especially at the production stage, it’s crucial to get a high-level overview of application performance with respect to latency, cost, and feedback scores. This ensures that it's delivering desirable results at scale.Online evaluations and automations allow you to process and score production traces in near real-time.Additionally, threads provide a seamless way to group traces from a single conversation, making it easier to track the performance of your application across multiple turns.Monitoring and A/B Testing\\u200bLangSmith provides monitoring charts that allow you to track key metrics over time. You can expand to view metrics for a given period and drill down into a specific data point to get a trace table for that time period — this is especially handy for debugging production issues.LangSmith also allows for tag and metadata grouping, which allows users to mark different versions of their applications with different identifiers and view how they are performing side-by-side within each chart. This is helpful for A/B testing changes in prompt, model, or retrieval strategy.Automations\\u200bAutomations are a powerful feature in LangSmith that allow you to perform actions on traces in near real-time. This can be used to automatically score traces, send them to annotation queues, or send them to datasets.To define an automation, simply provide a filter condition, a sampling rate, and an action to perform. Automations are particularly helpful for processing traces at production scale.Threads\\u200bMany LLM applications are multi-turn, meaning that they involve a series of interactions between the user and the application. LangSmith provides a threads view that groups traces from a single conversation together, making it easier to\", metadata={'source': 'https://docs.smith.langchain.com/user_guide', 'title': 'LangSmith User Guide | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we’ll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they’re just starting their journey.', 'language': 'en'}),\n",
       "  Document(page_content='Skip to main contentLangSmith API DocsSearchGo to AppQuick StartUser GuideTracingEvaluationProduction Monitoring & AutomationsPrompt HubProxyPricingSelf-HostingCookbookThis is outdated documentation for 🦜️🛠️ LangSmith, which is no longer actively maintained.For up-to-date documentation, see the latest version.User GuideOn this pageLangSmith User GuideLangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we’ll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they’re just starting their journey.Prototyping\\u200bPrototyping LLM applications often involves quick experimentation between prompts, model types, retrieval strategy and other parameters.\\nThe ability to rapidly understand how the model is performing — and debug where it is failing — is incredibly important for this phase.Debugging\\u200bWhen developing new LLM applications, we suggest having LangSmith tracing enabled by default.\\nOftentimes, it isn’t necessary to look at every single trace. However, when things go wrong (an unexpected end result, infinite agent loop, slower than expected execution, higher than expected token usage), it’s extremely helpful to debug by looking through the application traces. LangSmith gives clear visibility and debugging information at each step of an LLM sequence, making it much easier to identify and root-cause issues.\\nWe provide native rendering of chat messages, functions, and retrieve documents.Initial Test Set\\u200bWhile many developers still ship an initial version of their application based on “vibe checks”, we’ve seen an increasing number of engineering teams start to adopt a more test driven approach. LangSmith allows developers to create datasets, which are collections of inputs and reference outputs, and use these to run tests on their LLM applications.\\nThese test cases can be uploaded in bulk, created on the fly, or exported from application traces. LangSmith also makes it easy to run custom evaluations (both LLM and heuristic based) to score test results.Comparison View\\u200bWhen prototyping different versions of your applications and making changes, it’s important to see whether or not you’ve regressed with respect to your initial test cases.\\nOftentimes, changes in the prompt, retrieval strategy, or model choice can have huge implications in responses produced by your application.\\nIn order to get a sense for which variant is performing better, it’s useful to be able to view results for different configurations on the same datapoints side-by-side. We’ve invested heavily in a user-friendly comparison view for test runs to track and diagnose regressions in test scores across multiple revisions of your application.Playground\\u200bLangSmith provides a playground environment for rapid iteration and experimentation.\\nThis allows you to quickly test out different prompts and models. You can open the playground from any prompt or model run in your trace.', metadata={'source': 'https://docs.smith.langchain.com/user_guide', 'title': 'LangSmith User Guide | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we’ll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they’re just starting their journey.', 'language': 'en'}),\n",
       "  Document(page_content='meaning that they involve a series of interactions between the user and the application. LangSmith provides a threads view that groups traces from a single conversation together, making it easier to track the performance of and annotate your application across multiple turns.Was this page helpful?PreviousQuick StartNextOverviewPrototypingBeta TestingProductionCommunityDiscordTwitterGitHubDocs CodeLangSmith SDKPythonJS/TSMoreHomepageBlogLangChain Python DocsLangChain JS/TS DocsCopyright © 2024 LangChain, Inc.', metadata={'source': 'https://docs.smith.langchain.com/user_guide', 'title': 'LangSmith User Guide | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we’ll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they’re just starting their journey.', 'language': 'en'}),\n",
       "  Document(page_content='LangSmith User Guide | 🦜️🛠️ LangSmith', metadata={'source': 'https://docs.smith.langchain.com/user_guide', 'title': 'LangSmith User Guide | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we’ll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they’re just starting their journey.', 'language': 'en'})],\n",
       " 'answer': \"Yes, LangSmith can definitely help you test your LLM (Large Language Model) applications. Here's a breakdown of the various workflows and features that Langsmith provides to aid in testing:\\n\\n**Prototyping**: Use LangSmith's playground environment for rapid iteration and experimentation. Quickly test out different prompts and models, and open the playground from any prompt or model run in your trace.\\n\\n**Initial Test Set**: Create datasets, which are collections of inputs and reference outputs, and use these to run tests on your LLM applications. You can upload test cases in bulk, create them on the fly, or export them from application traces. LangSmith also allows for custom evaluations (both LLM and heuristic-based) to score test results.\\n\\n**Debugging**: When things go wrong with your LLM application, LangSmith gives you clear visibility and debugging information at each step of an LLM sequence, making it easier to identify and root-cause issues. Native rendering of chat messages, functions, and retrieve documents helps in debugging.\\n\\n**Beta Testing**: Use LangSmith's beta testing features to collect more data on how your LLM applications are performing in real-world scenarios. Develop an understanding for the types of inputs the app is performing well or poorly on and how exactly it's breaking down in those cases. Both feedback collection and run annotation are critical for this workflow.\\n\\n**Capturing Feedback**: Gather human feedback on the responses produced by your application, which helps draw attention to the most interesting runs and highlight edge cases that are causing problematic responses. Attach feedback scores to logged traces and filter on traces that have a specific feedback tag and score.\\n\\n**Annotating Traces**: Send runs to annotation queues, which allow annotators (PMs, engineers, or subject matter experts) to closely inspect interesting traces and annotate them with respect to different criteria. This allows you to catch regressions across important evaluation criteria.\\n\\n**Adding Runs to a Dataset**: As your application progresses through the beta testing phase, continue collecting data to refine and improve its performance. LangSmith enables you to add runs as examples to datasets (from both the project page and within an annotation queue), expanding your test coverage on real-world scenarios.\\n\\nThese are just some of the ways Langsmith can help you test your LLM applications. If you have any further questions or need more information, feel free to ask!\"}"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 8
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
